# This script reproduces Tables 1, 5, 8 and Figures 9 & 11 #
rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
proprietary <- read.csv("proprietary.csv")
load("non_p.RData")

# merge 
d_sub1 <- merge(d_sub1, proprietary, by = c("county_id", "time"), all.x = T)

###
model_area_con <- plm(lead(log_transaction_area,1) ~ 
                        tour + 
                        log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
                        log_hos_l1 + log_industrial_l1 + log_rev_l1 +
                        log_expd_l1 + log_middle_school_l1 + 
                        msec2currentsec_l1 + 
                        msec2currentgvn_l1 + 
                        mayor2currentsec_l1 + 
                        mayor2currentgvn_l1,
                      index = c("county_id", "month"),
                      effect = "twoways", model = "within",
                      data = d_sub1)
vcov_area_con <- clubSandwich::vcovCR(model_area_con, type="CR1S",
                                      cluster = d_sub1$county_id)
se_area_con <- coeftest(model_area_con, vcov_area_con)
reg_results_land <- list("model" = model_area_con, 
                         "se" = se_area_con)

model_area_con <- plm(lead(log_transaction_area,1 ) ~ 
                        tour + 
                        log_transaction_area_l1 +
                        log_transaction_area_l2 +
                        log_transaction_area_l3 +

                        log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
                        log_hos_l1 + log_industrial_l1 + log_rev_l1 +
                        log_expd_l1 + log_middle_school_l1 + 
                        
                        msec2currentsec_l1 + 
                        msec2currentgvn_l1 + 
                        mayor2currentsec_l1 + 
                        
                        mayor2currentgvn_l1,
                      index = c("county_id", "month"),
                      effect = "twoways", model = "within",
                      
                      data = d_sub1)  
vcov_area_con <- clubSandwich::vcovCR(model_area_con, type="CR1S", cluster = d_sub1$county_id)
se_area_con <- coeftest(model_area_con, vcov_area_con)
reg_results_lag_land <- list("model" = model_area_con, 
                             "se" = se_area_con)

sink("output/Table1.tex")
stargazer(reg_results_land$model,
          reg_results_lag_land$model,
          font.size = "large", digits = 4, 
          type = "latex",
          dep.var.labels = c("Logged Auction Area Next Month",
                             "Logged Auction Area Next Month"),
          column.labels = c("Logged Auction Area Next Month",
                            "Logged Auction Area Next Month"),
          dep.var.labels.include = F,
          style = "qje", no.space = FALSE, keep.stat = c("n", "rsq"),
          omit = c("county_id", "time"
          ),
          omit.yes.no = c("No", "Yes"),
          add.lines = list(c("County FE?", "Yes", "Yes"),
                           c("Time FE?", "Yes", "Yes"),
                           c("Controls?", "Yes", "Yes"),
                           c("Lagged dependent variable included?", "No", "Yes")),
          
          covariate.labels = c("Inspection",
                               "Logged auction area last month",
                               "Logged auction area two months prior",
                               "Logged auction area three months prior",
                               "logged GDP per capita last year", 
                               "logged total GDP last year", 
                               "logged fixed asset investment last year",
                               "logged volume of loans last year", 
                               "logged number of hospitals last year", 
                               "logged amount of industrial output last year", 
                               "logged revenue last year", 
                               "logged expenditure last year", 
                               "logged number of secondary schools last year",
                               "political connections 1 last year",
                               "political connections 2 last year",
                               "political connections 3 last year",
                               "political connections 4 last year"),
          
          star.cutoffs = c(.10, .05, .01), 
          se = list(reg_results_land$se[,2], 
                    reg_results_lag_land$se[,2]),
          header = F, notes = "robust standard errors clustered by county in parentheses", 
          float = F)
sink()

## Check if results hold when dropping all the never-treated
tours <- tapply(d_sub1$tour, d_sub1$county_id, mean) 
nts <- as.numeric(names(tours[tours == 0]) )
d_sub11 <- d_sub1[!d_sub1$county_id %in% nts, ]
#
model_area_con <- plm(lead(log_transaction_area,1) ~ 
                        tour + 
                        log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
                        log_hos_l1 + log_industrial_l1 + log_rev_l1 +
                        log_expd_l1 + log_middle_school_l1 + 
                        msec2currentsec_l1 + 
                        msec2currentgvn_l1 + 
                        mayor2currentsec_l1 + 
                        mayor2currentgvn_l1,
                      index = c("county_id", "month"),
                      effect = "twoways", model = "within",
                      
                      data = d_sub11)
vcov_area_con <- clubSandwich::vcovCR(model_area_con, type="CR1S", cluster = d_sub11$county_id)
se_area_con <- coeftest(model_area_con, vcov_area_con)

reg_results_land <- list("model" = model_area_con, 
                         "se" = se_area_con)

lower_area_con_95 <- se_area_con["tour", "Estimate"] - qnorm(.975) * se_area_con["tour", "Std. Error"]
upper_area_con_95 <- se_area_con["tour", "Estimate"] + qnorm(.975) * se_area_con["tour", "Std. Error"]

lower_area_con_90 <- se_area_con["tour", "Estimate"] - qnorm(.95) * se_area_con["tour", "Std. Error"]
upper_area_con_90 <- se_area_con["tour", "Estimate"] + qnorm(.95) * se_area_con["tour", "Std. Error"]
est_nolag <- se_area_con["tour", "Estimate"]

##
model_area_con_lag <- plm(lead(log_transaction_area,1) ~ 
                            tour + 
                            log_transaction_area_l1 +
                            log_transaction_area_l2 +
                            log_transaction_area_l3 +
                            log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
                            log_hos_l1 + log_industrial_l1 + log_rev_l1 +
                            log_expd_l1 + log_middle_school_l1 + 
                            msec2currentsec_l1 + 
                            msec2currentgvn_l1 + 
                            mayor2currentsec_l1 + 
                            mayor2currentgvn_l1,
                          index = c("county_id", "month"),
                          effect = "twoways", model = "within",
                          data = d_sub11)
vcov_area_con_lag <- clubSandwich::vcovCR(model_area_con_lag, type="CR1S", cluster = d_sub11$county_id)
se_area_con_lag <- coeftest(model_area_con_lag, vcov_area_con_lag)
reg_results_lag_land <- list("model" = model_area_con_lag, 
                             "se" = se_area_con_lag)


lower_area_con_95_lag <- se_area_con_lag["tour", "Estimate"] - qnorm(.975) * se_area_con_lag["tour", "Std. Error"]
upper_area_con_95_lag <- se_area_con_lag["tour", "Estimate"] + qnorm(.975) * se_area_con_lag["tour", "Std. Error"]

lower_area_con_90_lag <- se_area_con_lag["tour", "Estimate"] - qnorm(.95) * se_area_con_lag["tour", "Std. Error"]
upper_area_con_90_lag <- se_area_con_lag["tour", "Estimate"] + qnorm(.95) * se_area_con_lag["tour", "Std. Error"]

est_lag <- se_area_con_lag["tour", "Estimate"]

pdf("output/Figure11.pdf")
plot(x = 0, xlab = "", ylab = "Effect Estimates", 
     pch = "",
     ylim = c(-.28, .01),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Effects of Inspection \n on Land Auctions Next Month")
lines(c(1,1), c(lower_area_con_95, upper_area_con_95))
lines(c(1,1), c(lower_area_con_90, upper_area_con_90), 
      lwd = 5, col = "grey")
points(1, est_nolag, pch = 19)
mtext(side = 1, "Not controlling for \n lagged outcomes", at = 1, 
      outer = F,
      line = 1.2)

lines(c(1.1,1.1), c(lower_area_con_95_lag, upper_area_con_95_lag))
lines(c(1.1,1.1), c(lower_area_con_90_lag, upper_area_con_90_lag), 
      lwd = 5, col = "grey")
points(1.1, est_lag, pch = 19)
mtext(side = 1, "Controlling for \n lagged outcomes", at = 1.1, 
      outer = F,
      line = 1.2)

abline(h = 0, lty = 3)
dev.off()

### Making a table ###
sink("output/Table8.tex")
stargazer(reg_results_land$model,
          reg_results_lag_land$model,
          font.size = "large", digits = 4, 
          type = "latex",
          dep.var.labels = c("Logged Auction Area Next Month",
                             "Logged Auction Area Next Month"),
          column.labels = c("Logged Auction Area Next Month",
                            "Logged Auction Area Next Month"),
          dep.var.labels.include = F,
          style = "qje", no.space = FALSE, keep.stat = c("n", "rsq"),
          omit = c("county_id", "time"
          ),
          omit.yes.no = c("No", "Yes"),
          add.lines = list(c("County FE?", "Yes", "Yes"),
                           c("Time FE?", "Yes", "Yes"),
                           c("Controls?", "Yes", "Yes"),
                           c("Lagged dependent variable included?", "No", "Yes")),
          covariate.labels = c("Inspection",
                               "Logged auction area last month",
                               "Logged auction area two months prior",
                               "Logged auction area three months prior",
                               "logged GDP per capita last year", 
                               "logged total GDP last year", 
                               "logged fixed asset investment last year",
                               "logged volume of loans last year", 
                               "logged number of hospitals last year", 
                               "logged amount of industrial output last year", 
                               "logged revenue last year", 
                               "logged expenditure last year", 
                               "logged number of secondary schools last year",
                               "political connections 1 last year",
                               "political connections 2 last year",
                               "political connections 3 last year",
                               "political connections 4 last year"),
          star.cutoffs = c(.10, .05, .01), 
          se = list(reg_results_land$se[,2], 
                    reg_results_lag_land$se[,2]),
          p = list(reg_results_land$se[,4], 
                   reg_results_lag_land$se[,4]),
          header = F, notes = "robust standard errors clustered by county in parentheses", 
          float = F)
sink()

## main province year
formula <- as.formula(paste0(paste0("log_transaction_area", "_f", 1), 
                             "~ tour + 
                             log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
                             log_hos_l1 + log_industrial_l1 + log_rev_l1 +
                             log_expd_l1 + log_middle_school_l1 + 
                             # msec2currentsec2_l1 +      
                             msec2currentsec_l1 + 
                             msec2currentgvn_l1 + 
                             mayor2currentsec_l1 + 
                             #mayor2currentsec2_l1 + 
                             mayor2currentgvn_l1 + 
                             
                             
                             as.factor(county_id) + as.factor(time) + 
                             as.factor(province_id) * as.factor(year)"))


model_area_con <- lm(formula = formula,
                     data = d_sub1)
vcov_area_con <- clubSandwich::vcovCR(model_area_con, type="CR1S", cluster = d_sub1$county_id)
se_area_con <- coeftest(model_area_con, vcov_area_con)

vcov_area_con <- clubSandwich::vcovCR(model_area_con, type="CR1S", cluster = d_sub1$county_id)
se_area_con <- coeftest(model_area_con, vcov_area_con)

reg_results_land <- list("model" = model_area_con, 
                         "se" = se_area_con)

lower_area_con_95 <- se_area_con["tour", "Estimate"] - qnorm(.975) * se_area_con["tour", "Std. Error"]
upper_area_con_95 <- se_area_con["tour", "Estimate"] + qnorm(.975) * se_area_con["tour", "Std. Error"]

lower_area_con_90 <- se_area_con["tour", "Estimate"] - qnorm(.95) * se_area_con["tour", "Std. Error"]
upper_area_con_90 <- se_area_con["tour", "Estimate"] + qnorm(.95) * se_area_con["tour", "Std. Error"]
est_nolag <- se_area_con["tour", "Estimate"]

##
formula <- as.formula(paste0(paste0("log_transaction_area", "_f", 1), 
                             "~ tour + 
                             log_transaction_area_l1 + log_transaction_area_l2 + log_transaction_area_l3 + 
                             log_gdppc_l1 + log_gdp_l1 + log_fia_l1 + log_loan_l1 +
                             log_hos_l1 + log_industrial_l1 + log_rev_l1 +
                             log_expd_l1 + log_middle_school_l1 + 
                             # msec2currentsec2_l1 +      
                             msec2currentsec_l1 + 
                             msec2currentgvn_l1 + 
                             mayor2currentsec_l1 + 
                             #mayor2currentsec2_l1 + 
                             mayor2currentgvn_l1 + 
                             as.factor(county_id) + as.factor(time) + 
                             as.factor(province_id) * as.factor(year)"))
model_area_con_lag <- lm(formula = formula,
                         data = d_sub1)

vcov_area_con_lag <- clubSandwich::vcovCR(model_area_con_lag, type="CR1S", cluster = d_sub1$county_id)
se_area_con_lag <- coeftest(model_area_con_lag, vcov_area_con_lag)

reg_results_lag_land <- list("model" = model_area_con_lag, 
                             "se" = se_area_con_lag)

lower_area_con_95_lag <- se_area_con_lag["tour", "Estimate"] - qnorm(.975) * se_area_con_lag["tour", "Std. Error"]
upper_area_con_95_lag <- se_area_con_lag["tour", "Estimate"] + qnorm(.975) * se_area_con_lag["tour", "Std. Error"]

lower_area_con_90_lag <- se_area_con_lag["tour", "Estimate"] - qnorm(.95) * se_area_con_lag["tour", "Std. Error"]
upper_area_con_90_lag <- se_area_con_lag["tour", "Estimate"] + qnorm(.95) * se_area_con_lag["tour", "Std. Error"]

est_lag <- se_area_con_lag["tour", "Estimate"]

pdf("output/Figure9.pdf")
plot(x = 0, xlab = "", ylab = "Effect Estimates", 
     pch = "",
     ylim = c(-.28, .01),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Effects of Inspection \n on Land Auctions Next Month")
lines(c(1,1), c(lower_area_con_95, upper_area_con_95))
lines(c(1,1), c(lower_area_con_90, upper_area_con_90), 
      lwd = 5, col = "grey")
points(1, est_nolag, pch = 19)
mtext(side = 1, "Not controlling for \n lagged outcomes", at = 1, 
      outer = F,
      line = 1.2)

lines(c(1.1,1.1), c(lower_area_con_95_lag, upper_area_con_95_lag))
lines(c(1.1,1.1), c(lower_area_con_90_lag, upper_area_con_90_lag), 
      lwd = 5, col = "grey")
points(1.1, est_lag, pch = 19)
mtext(side = 1, "Controlling for \n lagged outcomes", at = 1.1, 
      outer = F,
      line = 1.2)

abline(h = 0, lty = 3)
dev.off()

### Making a table ###
sink("output/Table5.tex")
stargazer(reg_results_land$model,
          reg_results_lag_land$model,
          font.size = "large", digits = 4, 
          type = "latex",
          dep.var.labels = c("Logged Auction Area Next Month",
                             "Logged Auction Area Next Month"),
          column.labels = c("Logged Auction Area Next Month",
                            "Logged Auction Area Next Month"),
          dep.var.labels.include = F,
          style = "qje", no.space = FALSE, keep.stat = c("n", "rsq"),
          omit = c("county_id", "time", "province_id", 
                   "year"
          ),
          omit.yes.no = c("No", "Yes"),
          add.lines = list(c("County FE?", "Yes", "Yes"),
                           c("Time FE?", "Yes", "Yes"),
                           c("Controls?", "Yes", "Yes"),
                           c("Lagged dependent variable included?", "No", "Yes")),
          covariate.labels = c("Inspection",
                               "Logged auction area last month",
                               "Logged auction area two months prior",
                               "Logged auction area three months prior",
                               "logged GDP per capita last year", 
                               "logged total GDP last year", 
                               "logged fixed asset investment last year",
                               "logged volume of loans last year", 
                               "logged number of hospitals last year", 
                               "logged amount of industrial output last year", 
                               "logged revenue last year", 
                               "logged expenditure last year", 
                               "logged number of secondary schools last year",
                               "political connections 1 last year",
                               "political connections 2 last year",
                               "political connections 3 last year",
                               "political connections 4 last year"),
          star.cutoffs = c(.10, .05, .01), 
          se = list(reg_results_land$se[,2], 
                    reg_results_lag_land$se[,2]),
          p = list(reg_results_land$se[,4], 
                   reg_results_lag_land$se[,4]),
          header = F, notes = "robust standard errors clustered by county in parentheses", 
          float = F)
sink()





